package com.lixuwei.study.htmlutil.seleniumhq;

import org.apache.commons.io.FileUtils;
import org.apache.commons.lang3.StringUtils;
import org.openqa.selenium.*;
import org.openqa.selenium.chrome.ChromeDriver;
import org.openqa.selenium.chrome.ChromeDriverService;
import org.openqa.selenium.chrome.ChromeOptions;

import java.io.File;
import java.io.IOException;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class HelloSelenium {

    public static void main(String[] args) throws IOException, InterruptedException {
        Set<String> strings = parsePdd();
        for (String string : strings) {
            System.out.println(string);
        }
    }

    public static Set<String> parsePdd() throws IOException, InterruptedException{
        Set<String> ids = new HashSet<>();
        ChromeDriverService chromeDriverService = createAndStartService();
        ChromeDriver webDriver = null;
        try {
            Map<String, String> mobileEmulation = new HashMap<>();
            mobileEmulation.put("deviceName", "iPhone 6");

            Map<String, Integer> prefs = new HashMap<>();
            prefs.put("profile.managed_default_content_settings.images", 2); //不加载图片

            ChromeOptions chromeOptions = new ChromeOptions();
            chromeOptions.addArguments("--headless");
            chromeOptions.addArguments("--disable-gpu"); // applicable to windows os only
            chromeOptions.addArguments("--disable-dev-shm-usage"); // overcome limited resource problems
            chromeOptions.addArguments("--no-sandbox"); // Bypass OS security model
            chromeOptions.setExperimentalOption("mobileEmulation", mobileEmulation);
            chromeOptions.setExperimentalOption("prefs", prefs);

            Proxy proxy = new Proxy();
            proxy.setHttpProxy("202.112.237.102:3128").setSslProxy("202.112.237.102:3128");
            chromeOptions.setProxy(proxy);

            webDriver = new ChromeDriver(chromeDriverService, chromeOptions);
//            webDriver.get("https://shop62745329.m.taobao.com/");
            // 全部商品有好几页
//            String url = "http://mobile.yangkeduo.com/mall_page.html?mall_id=22931&item_index=0&sp=0";
            // 全部商品只有一页
            String url = "http://mobile.yangkeduo.com/mall_page.html?mall_id=805812660&item_index=0&sp=0";
            webDriver.get(url);

            Thread.sleep(10000);
//            takeScreenshot(webDriver, "shop.png");

            int limitTimes = 20; //设置20次
            int requestTimes = 0; //请求次数
            int ypos = 800;
            boolean fetch = true;
            while (fetch) {
                if (requestTimes > limitTimes) {
                    break;
                }
                boolean retry = false;
                int oldSize = ids.size();
                //下拉
                webDriver.executeScript("window.scrollTo(0,"+ypos+")");
//                takeScreenshot(webDriver, "move.png");
                Thread.sleep(3000);
                List<WebElement> divs = webDriver.findElements(By.xpath("//*[@id=\"mp-mgl-0-0-0\"]/div"));
                for (WebElement div : divs) {
                    String id = div.getAttribute("id");
                    if (StringUtils.isNotEmpty(id)) {
                        ids.add(id);
                    }
                    if (id.equals("mp-mgl-0-0-0-lbh")) {
                        System.out.println("到底了");
                        fetch=false;
                        break;
                    }
                    //TODO 检查到异常 retry = true
                }
                System.out.println(divs.size());
                //TODO 请求过快 拼多多如果给异常了 应该continue
                if (retry) {
                    continue;
                }

                //如果两次抓取的数目相同 就认为是抓到底了
                int newSize = ids.size();
                if (oldSize == newSize) {
                    System.out.println("--------same-------");
                    break;
                }
                System.out.println("times " + requestTimes);
                requestTimes++;
                ypos = ypos * 2;
            }

        } finally {
            chromeDriverService.stop();
//            if (webDriver != null) {
//                webDriver.close();
//                webDriver.quit();
//            }
        }
        return parseGoodsId(ids);
    }

    private static Set<String> parseGoodsId(Set<String> ids) {
        Pattern compile = Pattern.compile("mp-mgl-0-0-0-g-(\\d*)");
        Set<String> goodsIds = new HashSet<>();
        for (String id : ids) {
            Matcher matcher = compile.matcher(id);
            if (matcher.find()) {
                String goodsId = matcher.group(1);
                goodsIds.add(goodsId);
            }
        }
        return goodsIds;
    }


    public static void takeScreenshot(WebDriver webDriver, String fileName) {
        File screenshot = ((TakesScreenshot) webDriver).getScreenshotAs(OutputType.FILE);
        try {
            FileUtils.copyFile(screenshot, new File(fileName));
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    public static ChromeDriverService createAndStartService() throws IOException {
        String property = System.getProperty("os.name");
        File file = null;//linux chromedriver
        if (property != null && property.startsWith("Windows")) {
            file = new File("E:\\chromedriver_win32\\chromedriver.exe");
        } else if(property != null && property.startsWith("Mac")) {
            file = new File("/usr/local/bin/chromedriver");
        } else {
            file = new File("/home/headless/chromedriver");
        }
        ChromeDriverService service = new ChromeDriverService.Builder()
                    .usingDriverExecutable(file)
                    .usingAnyFreePort()
                    .build();
        service.start();
        return service;
    }
}
